import pandas as pd

def extract_top_documents(num_topics, num_docs=None):
    doc_topic_df = pd.read_csv(f'../data/doc_topic_df_{num_topics}_topics.csv')
    if num_docs is not None:
        # Select the top num_docs documents for each topic
        df = pd.wide_to_long(doc_topic_df, stubnames='topic', sep='_', i='post_id', j='topic_num').reset_index()
        df.rename(columns={'topic': 'weight'}, inplace=True)
        top_docs = df.groupby(['topic_num', 'subreddit']).apply(lambda x: x.sort_values('weight', ascending=False).head(num_docs)).reset_index(drop=True)
        top_docs.to_csv(f'../data/{num_topics}_topics_top_{num_docs}_docs.csv', index=False)


if __name__ == '__main__':
    extract_top_documents(10, 30)